/*
   Copyright The containerd Authors.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

package config

import (
	"context"
	"errors"
	"fmt"
	"net/url"
	gruntime "runtime"
	"slices"
	"time"

	"github.com/containerd/log"
	"github.com/pelletier/go-toml/v2"
	runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
	"k8s.io/kubelet/pkg/cri/streaming"

	runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
	runcoptions "github.com/containerd/containerd/api/types/runc/options"
	runtimeoptions "github.com/containerd/containerd/api/types/runtimeoptions/v1"
	"github.com/containerd/containerd/v2/internal/cri/annotations"
	"github.com/containerd/containerd/v2/internal/cri/opts"
	"github.com/containerd/containerd/v2/pkg/deprecation"
	"github.com/containerd/containerd/v2/plugins"
)

const (
	// defaultImagePullProgressTimeoutDuration is the default value of imagePullProgressTimeout.
	//
	// NOTE:
	//
	// This ImagePullProgressTimeout feature is ported from kubelet/dockershim's
	// --image-pull-progress-deadline. The original value is 1m0. Unlike docker
	// daemon, the containerd doesn't have global concurrent download limitation
	// before migrating to Transfer Service. If kubelet runs with concurrent
	// image pull, the node will run under IO pressure. The ImagePull process
	// could be impacted by self, if the target image is large one with a
	// lot of layers. And also both container's writable layers and image's storage
	// share one disk. The ImagePull process commits blob to content store
	// with fsync, which might bring the unrelated files' dirty pages into
	// disk in one transaction [1]. The 1m0 value isn't good enough. Based
	// on #9347 case and kubernetes community's usage [2], the default value
	// is updated to 5m0. If end-user still runs into unexpected cancel,
	// they need to config it based on their environment.
	//
	// [1]: Fast commits for ext4 - https://lwn.net/Articles/842385/
	// [2]: https://github.com/kubernetes/kubernetes/blob/1635c380b26a1d8cc25d36e9feace9797f4bae3c/cluster/gce/util.sh#L882
	defaultImagePullProgressTimeoutDuration = 5 * time.Minute
)

type SandboxControllerMode string

const (
	// ModePodSandbox means use Controller implementation from sbserver podsandbox package.
	// We take this one as a default mode.
	ModePodSandbox SandboxControllerMode = "podsandbox"
	// ModeShim means use whatever Controller implementation provided by shim.
	ModeShim SandboxControllerMode = "shim"
	// DefaultSandboxImage is the default image to use for sandboxes when empty or
	// for default configurations.
	DefaultSandboxImage = "registry.k8s.io/pause:3.10"
	// IOTypeFifo is container io implemented by creating named pipe
	IOTypeFifo = "fifo"
	// IOTypeStreaming is container io implemented by connecting the streaming api to sandbox endpoint
	IOTypeStreaming = "streaming"
)

// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
// and a runtime for untrusted workload.
type Runtime struct {
	// Type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
	Type string `toml:"runtime_type" json:"runtimeType"`
	// Path is an optional field that can be used to overwrite path to a shim runtime binary.
	// When specified, containerd will ignore runtime name field when resolving shim location.
	// Path must be abs.
	Path string `toml:"runtime_path" json:"runtimePath"`
	// PodAnnotations is a list of pod annotations passed to both pod sandbox as well as
	// container OCI annotations.
	PodAnnotations []string `toml:"pod_annotations" json:"PodAnnotations"`
	// ContainerAnnotations is a list of container annotations passed through to the OCI config of the containers.
	// Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
	// Currently, only device plugins populate the annotations.
	ContainerAnnotations []string `toml:"container_annotations" json:"ContainerAnnotations"`
	// Options are config options for the runtime.
	Options map[string]interface{} `toml:"options" json:"options"`
	// PrivilegedWithoutHostDevices overloads the default behaviour for adding host devices to the
	// runtime spec when the container is privileged. Defaults to false.
	PrivilegedWithoutHostDevices bool `toml:"privileged_without_host_devices" json:"privileged_without_host_devices"`
	// PrivilegedWithoutHostDevicesAllDevicesAllowed overloads the default behaviour device allowlisting when
	// to the runtime spec when the container when PrivilegedWithoutHostDevices is already enabled. Requires
	// PrivilegedWithoutHostDevices to be enabled. Defaults to false.
	PrivilegedWithoutHostDevicesAllDevicesAllowed bool `toml:"privileged_without_host_devices_all_devices_allowed" json:"privileged_without_host_devices_all_devices_allowed"`
	// CgroupWritable enables writable cgroups in non-privileged containers
	CgroupWritable bool `toml:"cgroup_writable" json:"cgroupWritable"`
	// BaseRuntimeSpec is a json file with OCI spec to use as base spec that all container's will be created from.
	BaseRuntimeSpec string `toml:"base_runtime_spec" json:"baseRuntimeSpec"`
	// NetworkPluginConfDir is a directory containing the CNI network information for the runtime class.
	NetworkPluginConfDir string `toml:"cni_conf_dir" json:"cniConfDir"`
	// NetworkPluginMaxConfNum is the max number of plugin config files that will
	// be loaded from the cni config directory by go-cni. Set the value to 0 to
	// load all config files (no arbitrary limit). The legacy default value is 1.
	NetworkPluginMaxConfNum int `toml:"cni_max_conf_num" json:"cniMaxConfNum"`
	// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
	// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
	// while using default snapshotters for operational simplicity.
	// See https://github.com/containerd/containerd/issues/6657 for details.
	Snapshotter string `toml:"snapshotter" json:"snapshotter"`
	// Sandboxer defines which sandbox runtime to use when scheduling pods
	// This features requires the new CRI server implementation (enabled by default in 2.0)
	// shim - means use whatever Controller implementation provided by shim (e.g. use RemoteController).
	// podsandbox - means use Controller implementation from sbserver podsandbox package.
	Sandboxer string `toml:"sandboxer" json:"sandboxer"`
	// IOType defines how containerd transfer the io streams of the container
	// if it is not set, the named pipe will be created for the container
	// we can also set it to "streaming" to create a stream by streaming api,
	// and use it as a channel to transfer the io stream
	IOType string `toml:"io_type" json:"io_type"`
}

// ContainerdConfig contains toml config related to containerd
type ContainerdConfig struct {
	// DefaultRuntimeName is the default runtime name to use from the runtimes table.
	DefaultRuntimeName string `toml:"default_runtime_name" json:"defaultRuntimeName"`

	// Runtimes is a map from CRI RuntimeHandler strings, which specify types of runtime
	// configurations, to the matching configurations.
	Runtimes map[string]Runtime `toml:"runtimes" json:"runtimes"`

	// IgnoreBlockIONotEnabledErrors is a boolean flag to ignore
	// blockio related errors when blockio support has not been
	// enabled.
	IgnoreBlockIONotEnabledErrors bool `toml:"ignore_blockio_not_enabled_errors" json:"ignoreBlockIONotEnabledErrors"`

	// IgnoreRdtNotEnabledErrors is a boolean flag to ignore RDT related errors
	// when RDT support has not been enabled.
	IgnoreRdtNotEnabledErrors bool `toml:"ignore_rdt_not_enabled_errors" json:"ignoreRdtNotEnabledErrors"`
}

// CniConfig contains toml config related to cni
type CniConfig struct {
	// NetworkPluginBinDir is the directory in which the binaries for the plugin is kept.
	//
	// DEPRECATED: use `NetworkPluginBinDirs` instead.`
	NetworkPluginBinDir string `toml:"bin_dir" json:"binDir"`
	// NetworkPluginBinDirs is the directories in which the binaries for the plugin is kept.
	//
	// Only use one of NetworkPluginBinDir and NetworkPluginBinDirs, not both.
	NetworkPluginBinDirs []string `toml:"bin_dirs" json:"binDirs"`
	// NetworkPluginConfDir is the directory in which the admin places a CNI conf.
	NetworkPluginConfDir string `toml:"conf_dir" json:"confDir"`
	// NetworkPluginMaxConfNum is the max number of plugin config files that will
	// be loaded from the cni config directory by go-cni. Set the value to 0 to
	// load all config files (no arbitrary limit). The legacy default value is 1.
	NetworkPluginMaxConfNum int `toml:"max_conf_num" json:"maxConfNum"`
	// NetworkPluginSetupSerially is a boolean flag to specify whether containerd sets up networks serially
	// if there are multiple CNI plugin config files existing and NetworkPluginMaxConfNum is larger than 1.
	//
	// NOTE: On the Linux platform, containerd provides loopback network
	// configuration by default. There are at least two network plugins.
	// The default value of NetworkPluginSetupSerially is false which means
	// the loopback and eth0 are handled in parallel mode. Since the loopback
	// device is created as the net namespace is created, it's safe to run
	// in parallel mode as the default setting.
	NetworkPluginSetupSerially bool `toml:"setup_serially" json:"setupSerially"`
	// NetworkPluginConfTemplate is the file path of golang template used to generate cni config.
	// When it is set, containerd will get cidr(s) from kubelet to replace {{.PodCIDR}},
	// {{.PodCIDRRanges}} or {{.Routes}} in the template, and write the config into
	// NetworkPluginConfDir.
	// Ideally the cni config should be placed by system admin or cni daemon like calico,
	// weaveworks etc. However, this is useful for the cases when there is no cni daemonset to place cni config.
	// This allowed for very simple generic networking using the Kubernetes built in node pod CIDR IPAM, avoiding the
	// need to fetch the node object through some external process (which has scalability, auth, complexity issues).
	// It is currently heavily used in kubernetes-containerd CI testing
	// NetworkPluginConfTemplate was once deprecated in containerd v1.7.0,
	// but its deprecation was cancelled in v1.7.3.
	NetworkPluginConfTemplate string `toml:"conf_template" json:"confTemplate"`
	// IPPreference specifies the strategy to use when selecting the main IP address for a pod.
	//
	// Options include:
	// * ipv4, "" - (default) select the first ipv4 address
	// * ipv6 - select the first ipv6 address
	// * cni - use the order returned by the CNI plugins, returning the first IP address from the results
	IPPreference string `toml:"ip_pref" json:"ipPref"`
	// UseInternalLoopback specifies if we use the CNI loopback plugin or internal mechanism to set lo to up
	UseInternalLoopback bool `toml:"use_internal_loopback" json:"useInternalLoopback"`
}

// Mirror contains the config related to the registry mirror
type Mirror struct {
	// Endpoints are endpoints for a namespace. CRI plugin will try the endpoints
	// one by one until a working one is found. The endpoint must be a valid url
	// with host specified.
	// The scheme, host and path from the endpoint URL will be used.
	Endpoints []string `toml:"endpoint" json:"endpoint"`
}

// AuthConfig contains the config related to authentication to a specific registry
type AuthConfig struct {
	// Username is the username to login the registry.
	Username string `toml:"username" json:"username"`
	// Password is the password to login the registry.
	Password string `toml:"password" json:"password"`
	// Auth is a base64 encoded string from the concatenation of the username,
	// a colon, and the password.
	Auth string `toml:"auth" json:"auth"`
	// IdentityToken is used to authenticate the user and get
	// an access token for the registry.
	IdentityToken string `toml:"identitytoken" json:"identitytoken"`
}

// Registry is registry settings configured
type Registry struct {
	// ConfigPath is a path to the root directory containing registry-specific
	// configurations.
	// If ConfigPath is set, the rest of the registry specific options are ignored.
	ConfigPath string `toml:"config_path" json:"configPath"`
	// Mirrors are namespace to mirror mapping for all namespaces.
	// This option will not be used when ConfigPath is provided.
	// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.1.
	// Supported in 1.x releases.
	Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
	// Configs are configs for each registry.
	// The key is the domain name or IP of the registry.
	// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.1.
	// Supported in 1.x releases.
	Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
	// Auths are registry endpoint to auth config mapping. The registry endpoint must
	// be a valid url with host specified.
	// DEPRECATED: Use ConfigPath instead. Remove in containerd 2.1.
	// Supported in 1.x releases.
	Auths map[string]AuthConfig `toml:"auths" json:"auths"`
	// Headers adds additional HTTP headers that get sent to all registries
	Headers map[string][]string `toml:"headers" json:"headers"`
}

// RegistryConfig contains configuration used to communicate with the registry.
type RegistryConfig struct {
	// Auth contains information to authenticate to the registry.
	Auth *AuthConfig `toml:"auth" json:"auth"`
}

// ImageDecryption contains configuration to handling decryption of encrypted container images.
type ImageDecryption struct {
	// KeyModel specifies the trust model of where keys should reside.
	//
	// Details of field usage can be found in:
	// https://github.com/containerd/containerd/tree/main/docs/cri/config.md
	//
	// Details of key models can be found in:
	// https://github.com/containerd/containerd/tree/main/docs/cri/decryption.md
	KeyModel string `toml:"key_model" json:"keyModel"`
}

// ImagePlatform represents the platform to use for an image including the
// snapshotter to use. If snapshotter is not provided, the platform default
// can be assumed. When platform is not provided, the default platform can
// be assumed
type ImagePlatform struct {
	Platform string `toml:"platform" json:"platform"`
	// Snapshotter setting snapshotter at runtime level instead of making it as a global configuration.
	// An example use case is to use devmapper or other snapshotters in Kata containers for performance and security
	// while using default snapshotters for operational simplicity.
	// See https://github.com/containerd/containerd/issues/6657 for details.
	Snapshotter string `toml:"snapshotter" json:"snapshotter"`
}

type ImageConfig struct {
	// Snapshotter is the snapshotter used by containerd.
	Snapshotter string `toml:"snapshotter" json:"snapshotter"`

	// DisableSnapshotAnnotations disables to pass additional annotations (image
	// related information) to snapshotters. These annotations are required by
	// stargz snapshotter (https://github.com/containerd/stargz-snapshotter).
	DisableSnapshotAnnotations bool `toml:"disable_snapshot_annotations" json:"disableSnapshotAnnotations"`

	// DiscardUnpackedLayers is a boolean flag to specify whether to allow GC to
	// remove layers from the content store after successfully unpacking these
	// layers to the snapshotter.
	DiscardUnpackedLayers bool `toml:"discard_unpacked_layers" json:"discardUnpackedLayers"`

	// PinnedImages are images which the CRI plugin uses and should not be
	// removed by the CRI client. The images have a key which can be used
	// by other plugins to lookup the current image name.
	// Image names should be full names including domain and tag
	// Examples:
	//   "sandbox": "k8s.gcr.io/pause:3.10"
	//   "base": "docker.io/library/ubuntu:latest"
	// Migrated from:
	// (PluginConfig).SandboxImage string `toml:"sandbox_image" json:"sandboxImage"`
	PinnedImages map[string]string `toml:"pinned_images" json:"pinned_images"`

	// RuntimePlatforms is map between the runtime and the image platform to
	// use for that runtime. When resolving an image for a runtime, this
	// mapping will be used to select the image for the platform and the
	// snapshotter for unpacking.
	RuntimePlatforms map[string]ImagePlatform `toml:"runtime_platforms" json:"runtimePlatforms"`

	// Registry contains config related to the registry
	Registry Registry `toml:"registry" json:"registry"`

	// ImageDecryption contains config related to handling decryption of encrypted container images
	ImageDecryption `toml:"image_decryption" json:"imageDecryption"`

	// MaxConcurrentDownloads restricts the number of concurrent downloads for each image.
	// TODO: Migrate to transfer service
	MaxConcurrentDownloads int `toml:"max_concurrent_downloads" json:"maxConcurrentDownloads"`

	// ConcurrentLayerFetchBuffer restricts the maximum concurrent chunks size in for each image during a download.
	ConcurrentLayerFetchBuffer int `toml:"concurrent_layer_fetch_buffer" json:"concurrentLayerFetchBuffer"`

	// ImagePullProgressTimeout is the maximum duration that there is no
	// image data read from image registry in the open connection. It will
	// be reset whatever a new byte has been read. If timeout, the image
	// pulling will be cancelled. A zero value means there is no timeout.
	//
	// The string is in the golang duration format, see:
	//   https://golang.org/pkg/time/#ParseDuration
	ImagePullProgressTimeout string `toml:"image_pull_progress_timeout" json:"imagePullProgressTimeout"`

	// ImagePullWithSyncFs is an experimental setting. It's to force sync
	// filesystem during unpacking to ensure that data integrity.
	// TODO: Migrate to transfer service
	ImagePullWithSyncFs bool `toml:"image_pull_with_sync_fs" json:"imagePullWithSyncFs"`

	// StatsCollectPeriod is the period (in seconds) of snapshots stats collection.
	StatsCollectPeriod int `toml:"stats_collect_period" json:"statsCollectPeriod"`

	// Uses client.Pull to pull images locally, instead of containerd's Transfer Service.
	// By default it is set to false, i.e. use transfer service to pull images.
	// When transfer service is used to pull images, pull related configs, like max_concurrent_downloads
	// and unpack_config are configured under [plugins."io.containerd.transfer.v1.local"]
	UseLocalImagePull bool `toml:"use_local_image_pull" json:"useLocalImagePull"`
}

// RuntimeConfig contains toml config related to CRI plugin,
// it is a subset of Config.
type RuntimeConfig struct {
	// ContainerdConfig contains config related to containerd
	ContainerdConfig `toml:"containerd" json:"containerd"`
	// CniConfig contains config related to cni
	CniConfig `toml:"cni" json:"cni"`
	// EnableSelinux indicates to enable the selinux support.
	EnableSelinux bool `toml:"enable_selinux" json:"enableSelinux"`
	// SelinuxCategoryRange allows the upper bound on the category range to be set.
	// If not specified or set to 0, defaults to 1024 from the selinux package.
	SelinuxCategoryRange int `toml:"selinux_category_range" json:"selinuxCategoryRange"`
	// MaxContainerLogLineSize is the maximum log line size in bytes for a container.
	// Log line longer than the limit will be split into multiple lines. Non-positive
	// value means no limit.
	MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogLineSize"`
	// DisableApparmor indicates to disable the apparmor support.
	// This is useful when the containerd does not have permission to access Apparmor.
	DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"`
	// RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's
	// current OOMScoreADj.
	// This is useful when the containerd does not have permission to decrease OOMScoreAdj.
	RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"`
	// DisableProcMount disables Kubernetes ProcMount support. This MUST be set to `true`
	// when using containerd with Kubernetes <=1.11.
	DisableProcMount bool `toml:"disable_proc_mount" json:"disableProcMount"`
	// UnsetSeccompProfile is the profile containerd/cri will use If the provided seccomp profile is
	// unset (`""`) for a container (default is `unconfined`)
	UnsetSeccompProfile string `toml:"unset_seccomp_profile" json:"unsetSeccompProfile"`
	// TolerateMissingHugetlbController if set to false will error out on create/update
	// container requests with huge page limits if the cgroup controller for hugepages is not present.
	// This helps with supporting Kubernetes <=1.18 out of the box. (default is `true`)
	TolerateMissingHugetlbController bool `toml:"tolerate_missing_hugetlb_controller" json:"tolerateMissingHugetlbController"`
	// DisableHugetlbController indicates to silently disable the hugetlb controller, even when it is
	// present in /sys/fs/cgroup/cgroup.controllers.
	// This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
	DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
	// DeviceOwnershipFromSecurityContext changes the default behavior of setting container devices uid/gid
	// from CRI's SecurityContext (RunAsUser/RunAsGroup) instead of taking host's uid/gid. Defaults to false.
	DeviceOwnershipFromSecurityContext bool `toml:"device_ownership_from_security_context" json:"device_ownership_from_security_context"`
	// IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
	// isolation, security and early detection of issues in the mount configuration when using
	// ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
	IgnoreImageDefinedVolumes bool `toml:"ignore_image_defined_volumes" json:"ignoreImageDefinedVolumes"`
	// NetNSMountsUnderStateDir places all mounts for network namespaces under StateDir/netns instead
	// of being placed under the hardcoded directory /var/run/netns. Changing this setting requires
	// that all containers are deleted.
	NetNSMountsUnderStateDir bool `toml:"netns_mounts_under_state_dir" json:"netnsMountsUnderStateDir"`
	// EnableUnprivilegedPorts configures net.ipv4.ip_unprivileged_port_start=0
	// for all containers which are not using host network
	// and if it is not overwritten by PodSandboxConfig
	EnableUnprivilegedPorts bool `toml:"enable_unprivileged_ports" json:"enableUnprivilegedPorts"`
	// EnableUnprivilegedICMP configures net.ipv4.ping_group_range="0 2147483647"
	// for all containers which are not using host network, are not running in user namespace
	// and if it is not overwritten by PodSandboxConfig
	EnableUnprivilegedICMP bool `toml:"enable_unprivileged_icmp" json:"enableUnprivilegedICMP"`
	// EnableCDI indicates to enable injection of the Container Device Interface Specifications
	// into the OCI config
	// For more details about CDI and the syntax of CDI Spec files please refer to
	// https://tags.cncf.io/container-device-interface.
	EnableCDI bool `toml:"enable_cdi" json:"enableCDI"`
	// CDISpecDirs is the list of directories to scan for Container Device Interface Specifications
	// For more details about CDI configuration please refer to
	// https://tags.cncf.io/container-device-interface#containerd-configuration
	CDISpecDirs []string `toml:"cdi_spec_dirs" json:"cdiSpecDirs"`
	// DrainExecSyncIOTimeout is the maximum duration to wait for ExecSync
	// API' IO EOF event after exec init process exits. A zero value means
	// there is no timeout.
	//
	// The string is in the golang duration format, see:
	//   https://golang.org/pkg/time/#ParseDuration
	//
	// For example, the value can be '5h', '2h30m', '10s'.
	DrainExecSyncIOTimeout string `toml:"drain_exec_sync_io_timeout" json:"drainExecSyncIOTimeout"`

	// IgnoreDeprecationWarnings is the list of the deprecation IDs (such as "io.containerd.deprecation/pull-schema-1-image")
	// that should be ignored for checking "ContainerdHasNoDeprecationWarnings" condition.
	IgnoreDeprecationWarnings []string `toml:"ignore_deprecation_warnings" json:"ignoreDeprecationWarnings"`
}

// X509KeyPairStreaming contains the x509 configuration for streaming
type X509KeyPairStreaming struct {
	// TLSCertFile is the path to a certificate file
	TLSCertFile string `toml:"tls_cert_file" json:"tlsCertFile"`
	// TLSKeyFile is the path to a private key file
	TLSKeyFile string `toml:"tls_key_file" json:"tlsKeyFile"`
}

// Config contains all configurations for CRI runtime plugin.
type Config struct {
	// RuntimeConfig is the config for CRI runtime.
	RuntimeConfig
	// ContainerdRootDir is the root directory path for containerd.
	ContainerdRootDir string `json:"containerdRootDir"`
	// ContainerdEndpoint is the containerd endpoint path.
	ContainerdEndpoint string `json:"containerdEndpoint"`
	// RootDir is the root directory path for managing cri plugin files
	// (metadata checkpoint etc.)
	RootDir string `json:"rootDir"`
	// StateDir is the root directory path for managing volatile pod/container data
	StateDir string `json:"stateDir"`
}

// ServerConfig contains all the configuration for the CRI API server.
type ServerConfig struct {
	// DisableTCPService disables serving CRI on the TCP server.
	DisableTCPService bool `toml:"disable_tcp_service" json:"disableTCPService"`
	// StreamServerAddress is the ip address streaming server is listening on.
	StreamServerAddress string `toml:"stream_server_address" json:"streamServerAddress"`
	// StreamServerPort is the port streaming server is listening on.
	StreamServerPort string `toml:"stream_server_port" json:"streamServerPort"`
	// StreamIdleTimeout is the maximum time a streaming connection
	// can be idle before the connection is automatically closed.
	// The string is in the golang duration format, see:
	//   https://golang.org/pkg/time/#ParseDuration
	StreamIdleTimeout string `toml:"stream_idle_timeout" json:"streamIdleTimeout"`
	// EnableTLSStreaming indicates to enable the TLS streaming support.
	EnableTLSStreaming bool `toml:"enable_tls_streaming" json:"enableTLSStreaming"`
	// X509KeyPairStreaming is a x509 key pair used for TLS streaming
	X509KeyPairStreaming `toml:"x509_key_pair_streaming" json:"x509KeyPairStreaming"`
}

const (
	// RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
	RuntimeUntrusted = "untrusted"
	// RuntimeDefault is the implicit runtime defined for ContainerdConfig.DefaultRuntime
	RuntimeDefault = "default"
	// KeyModelNode is the key model where key for encrypted images reside
	// on the worker nodes
	KeyModelNode = "node"
)

// ValidateImageConfig validates the given image configuration
func ValidateImageConfig(ctx context.Context, c *ImageConfig) ([]deprecation.Warning, error) {
	var warnings []deprecation.Warning

	useConfigPath := c.Registry.ConfigPath != ""
	if len(c.Registry.Mirrors) > 0 {
		if useConfigPath {
			return warnings, errors.New("`mirrors` cannot be set when `config_path` is provided")
		}
		warnings = append(warnings, deprecation.CRIRegistryMirrors)
		log.G(ctx).Warning("`mirrors` is deprecated, please use `config_path` instead")
	}

	if len(c.Registry.Configs) != 0 {
		warnings = append(warnings, deprecation.CRIRegistryConfigs)
		log.G(ctx).Warning("`configs` is deprecated, please use `config_path` instead")
	}

	// Validation for deprecated auths options and mapping it to configs.
	if len(c.Registry.Auths) != 0 {
		if c.Registry.Configs == nil {
			c.Registry.Configs = make(map[string]RegistryConfig)
		}
		for endpoint, auth := range c.Registry.Auths {
			u, err := url.Parse(endpoint)
			if err != nil {
				return warnings, fmt.Errorf("failed to parse registry url %q from `registry.auths`: %w", endpoint, err)
			}
			if u.Scheme != "" {
				// Do not include the scheme in the new registry config.
				endpoint = u.Host
			}
			config := c.Registry.Configs[endpoint]
			config.Auth = &auth
			c.Registry.Configs[endpoint] = config
		}
		warnings = append(warnings, deprecation.CRIRegistryAuths)
		log.G(ctx).Warning("`auths` is deprecated, please use `ImagePullSecrets` instead")
	}

	// Validation for image_pull_progress_timeout
	if c.ImagePullProgressTimeout != "" {
		if _, err := time.ParseDuration(c.ImagePullProgressTimeout); err != nil {
			return warnings, fmt.Errorf("invalid image pull progress timeout: %w", err)
		}
	}

	return warnings, nil
}

// CheckLocalImagePullConfigs checks if there are CRI Image Config options configured that are not supported
// with transfer service and sets UseLocalImagePull to true. This ensures compatibility with configurations
// that aren't supported or need to be configured differently when using transfer service.
func CheckLocalImagePullConfigs(ctx context.Context, c *ImageConfig) {
	// If already using local image pull, no need to check for conflicts
	if c.UseLocalImagePull {
		return
	}

	// List of Config options that automatically trigger fallback to local image pull
	localPullOnlyConfigs := []struct {
		Name      string
		IsPresent func() bool
		Reason    string
	}{
		{
			Name: "DisableSnapshotAnnotations",
			IsPresent: func() bool {
				if gruntime.GOOS == "windows" {
					return c.DisableSnapshotAnnotations
				}
				return !c.DisableSnapshotAnnotations
			},
			Reason: "moved to snapshotter plugin when using transfer service",
		},
		{
			Name:      "DiscardUnpackedLayers",
			IsPresent: func() bool { return c.DiscardUnpackedLayers },
			Reason:    "not supported with transfer service",
		},
		{
			Name:      "Registry.Mirrors",
			IsPresent: func() bool { return len(c.Registry.Mirrors) > 0 },
			Reason:    "not supported with transfer service (also deprecated)",
		},
		{
			Name:      "Registry.Configs",
			IsPresent: func() bool { return len(c.Registry.Configs) > 0 },
			Reason:    "not supported with transfer service (also deprecated)",
		},
		{
			Name:      "Registry.Auths",
			IsPresent: func() bool { return len(c.Registry.Auths) > 0 },
			Reason:    "not supported with transfer service (also deprecated)",
		},
		{
			Name:      "MaxConcurrentDownloads",
			IsPresent: func() bool { return c.MaxConcurrentDownloads != 3 },
			Reason:    "must be configured in transfer service plugin: plugins.\"io.containerd.transfer.v1.local\"",
		},
		{
			Name:      "ImagePullWithSyncFs",
			IsPresent: func() bool { return c.ImagePullWithSyncFs },
			Reason:    "not supported with transfer service",
		},
	}

	for _, config := range localPullOnlyConfigs {
		if config.IsPresent() {
			// Fall back to local image pull
			c.UseLocalImagePull = true
			log.G(ctx).Warnf(
				"Found '%s' in CRI config which is incompatible with transfer service (%s). "+
					"Falling back to local image pull mode.",
				config.Name,
				config.Reason,
			)
			// Break after first conflict is found
			break
		}
	}
}

// ValidateRuntimeConfig validates the given runtime configuration.
func ValidateRuntimeConfig(ctx context.Context, c *RuntimeConfig) ([]deprecation.Warning, error) {
	var warnings []deprecation.Warning
	if c.ContainerdConfig.Runtimes == nil {
		c.ContainerdConfig.Runtimes = make(map[string]Runtime)
	}

	// Validation for default_runtime_name
	if c.ContainerdConfig.DefaultRuntimeName == "" {
		return warnings, errors.New("`default_runtime_name` is empty")
	}
	if _, ok := c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName]; !ok {
		return warnings, fmt.Errorf("no corresponding runtime configured in `containerd.runtimes` for `containerd` `default_runtime_name = \"%s\"", c.ContainerdConfig.DefaultRuntimeName)
	}

	// Validation for CNI config
	if len(c.CniConfig.NetworkPluginBinDir) != 0 {
		warnings = append(warnings, deprecation.CRICNIBinDir)
		log.G(ctx).Warning("`bin_dir` is deprecated, please use `bin_dirs` instead")

		if slices.Equal(c.CniConfig.NetworkPluginBinDirs, defaultNetworkPluginBinDirs()) {
			// if a user set `bin_dir` explicitly, we remove the default value of `bin_dirs`
			// to avoid the unexpected conflict between the two since we don't allow setting both.
			c.CniConfig.NetworkPluginBinDirs = nil
		}
		if len(c.CniConfig.NetworkPluginBinDirs) == 0 {
			// Before `NetworkPluginBinDir` is deprecated and removed, we manually move it
			// into `NetworkPluginBinDirs` (if `NetworkPluginBinDirs` is empty)
			// so that we can use it in the rest of the code.
			c.CniConfig.NetworkPluginBinDirs = []string{c.CniConfig.NetworkPluginBinDir}
			c.CniConfig.NetworkPluginBinDir = ""
		}
	}
	if len(c.CniConfig.NetworkPluginBinDirs) != 0 && len(c.CniConfig.NetworkPluginBinDir) != 0 {
		return warnings, errors.New("`cni.bin_dir` and `cni.bin_dirs` cannot be set at the same time")
	}

	for k, r := range c.ContainerdConfig.Runtimes {
		if r.CgroupWritable && !opts.IsCgroup2UnifiedMode() {
			return warnings, fmt.Errorf("runtime %s: `cgroup_writable` is only supported on cgroup v2", k)
		}

		if !r.PrivilegedWithoutHostDevices && r.PrivilegedWithoutHostDevicesAllDevicesAllowed {
			return warnings, errors.New("`privileged_without_host_devices_all_devices_allowed` requires `privileged_without_host_devices` to be enabled")
		}
		// If empty, use default podSandbox mode
		if len(r.Sandboxer) == 0 {
			r.Sandboxer = string(ModePodSandbox)
			c.ContainerdConfig.Runtimes[k] = r
		}

		if len(r.IOType) == 0 {
			r.IOType = IOTypeFifo
		}
		if r.IOType != IOTypeStreaming && r.IOType != IOTypeFifo {
			return warnings, errors.New("`io_type` can only be `streaming` or `named_pipe`")
		}
	}

	// Validation for drain_exec_sync_io_timeout
	if c.DrainExecSyncIOTimeout != "" {
		if _, err := time.ParseDuration(c.DrainExecSyncIOTimeout); err != nil {
			return warnings, fmt.Errorf("invalid `drain_exec_sync_io_timeout`: %w", err)
		}
	}
	if err := ValidateEnableUnprivileged(ctx, c); err != nil {
		return warnings, err
	}
	return warnings, nil
}

// ValidateServerConfig validates the given server configuration.
func ValidateServerConfig(ctx context.Context, c *ServerConfig) ([]deprecation.Warning, error) {
	var warnings []deprecation.Warning
	// Validation for stream_idle_timeout
	if c.StreamIdleTimeout != "" {
		if _, err := time.ParseDuration(c.StreamIdleTimeout); err != nil {
			return warnings, fmt.Errorf("invalid stream idle timeout: %w", err)
		}
	}
	return warnings, nil
}

func (config *Config) GetSandboxRuntime(podSandboxConfig *runtime.PodSandboxConfig, runtimeHandler string) (Runtime, error) {
	if untrustedWorkload(podSandboxConfig) {
		// If the untrusted annotation is provided, runtimeHandler MUST be empty.
		if runtimeHandler != "" && runtimeHandler != RuntimeUntrusted {
			return Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
		}

		//  If the untrusted workload is requesting access to the host/node, this request will fail.
		//
		//  Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
		// runtime may support this.  For example, in a virtual-machine isolated runtime, privileged
		// is a supported option, granting the workload to access the entire guest VM instead of host.
		// TODO(windows): Deprecate this so that we don't need to handle it for windows.
		if hostAccessingSandbox(podSandboxConfig) {
			return Runtime{}, errors.New("untrusted workload with host access is not allowed")
		}

		runtimeHandler = RuntimeUntrusted
	}

	if runtimeHandler == "" {
		runtimeHandler = config.DefaultRuntimeName
	}

	r, ok := config.Runtimes[runtimeHandler]
	if !ok {
		return Runtime{}, fmt.Errorf("no runtime for %q is configured", runtimeHandler)
	}
	return r, nil

}

// untrustedWorkload returns true if the sandbox contains untrusted workload.
func untrustedWorkload(config *runtime.PodSandboxConfig) bool {
	return config.GetAnnotations()[annotations.UntrustedWorkload] == "true"
}

// hostAccessingSandbox returns true if the sandbox configuration
// requires additional host access for the sandbox.
func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool {
	securityContext := config.GetLinux().GetSecurityContext()

	namespaceOptions := securityContext.GetNamespaceOptions()
	if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE ||
		namespaceOptions.GetPid() == runtime.NamespaceMode_NODE ||
		namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE {
		return true
	}

	return false
}

// GenerateRuntimeOptions generates runtime options from cri plugin config.
func GenerateRuntimeOptions(r Runtime) (interface{}, error) {
	if r.Options == nil {
		return nil, nil
	}

	b, err := toml.Marshal(r.Options)
	if err != nil {
		return nil, fmt.Errorf("failed to marshal TOML blob for runtime %q: %w", r.Type, err)
	}

	options := getRuntimeOptionsType(r.Type)
	if err := toml.Unmarshal(b, options); err != nil {
		return nil, err
	}

	// For generic configuration, if no config path specified (preserving old behavior), pass
	// the whole TOML configuration section to the runtime.
	if runtimeOpts, ok := options.(*runtimeoptions.Options); ok && runtimeOpts.ConfigPath == "" {
		runtimeOpts.ConfigBody = b
	}

	return options, nil
}

// getRuntimeOptionsType gets empty runtime options by the runtime type name.
func getRuntimeOptionsType(t string) interface{} {
	switch t {
	case plugins.RuntimeRuncV2:
		return &runcoptions.Options{}
	case plugins.RuntimeRunhcsV1:
		return &runhcsoptions.Options{}
	default:
		return &runtimeoptions.Options{}
	}
}

func DefaultServerConfig() ServerConfig {
	return ServerConfig{
		DisableTCPService:   true,
		StreamServerAddress: "127.0.0.1",
		StreamServerPort:    "0",
		StreamIdleTimeout:   streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
		EnableTLSStreaming:  false,
		X509KeyPairStreaming: X509KeyPairStreaming{
			TLSKeyFile:  "",
			TLSCertFile: "",
		},
	}
}
